(You should have these libraries installed before the workshop session. If not, please ask for help) igraph: dplyr: Remotes:
## Warning: package 'igraph' was built under R version 4.2.3
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
## Warning: package 'dplyr' was built under R version 4.2.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:igraph':
##
## as_data_frame, groups, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
CMLfile = "https://raw.githubusercontent.com/chengwailei/AI_Workshop_2023/main/NetworkAnalysis_081123/Data/CML_MalaCard_Gene.csv"
CML_keygenes_df = read.csv(CMLfile)
rmarkdown::paged_table(CML_keygenes_df) ## dont run thisSTRINGfile = "https://stringdb-downloads.org/download/protein.physical.links.v12.0/9606.protein.physical.links.v12.0.txt.gz"
interactome_df = fread(STRINGfile,
sep = " ",
header = TRUE,)
interactome_df = as.data.frame(interactome_df)
rmarkdown::paged_table(interactome_df) ## dont run thisSTRINGAliasFile = "https://stringdb-downloads.org/download/protein.aliases.v12.0/9606.protein.aliases.v12.0.txt.gz"
alias_df = fread(STRINGAliasFile,
sep = "\t",
fill = TRUE)
alias_df = as.data.frame(alias_df)
colnames(alias_df) <- c("protein_name", "alias", "source")
rmarkdown::paged_table(alias_df) ## dont run thisWe are interested in protein encoding genes that are likely pathogeneic in the MalaCard geneset.
CML_keygenes_list = CML_keygenes_df[(CML_keygenes_df$Category == "Protein Coding")&(CML_keygenes_df$Evidence=="Diseases_inferred"),]$Symbol
CML_keygenes_list = unique(CML_keygenes_list)
CML_keygenes_list## [1] "ABL1" "BCR" "NRAS" "KRAS" "SF3B1" "SETBP1"
## [7] "RUNX1" "CSF3R" "BRAF" "IFNA1" "KIT" "CRKL"
## [13] "JAK2" "PDGFRB" "IL3" "STAT5A" "CSF3" "FLT3"
## [19] "CD34" "LYN" "CSF2" "KITLG" "IRF8" "PDGFRA"
## [25] "WT1" "IFNA2" "SRC" "SLC22A1" "HCK" "MCL1"
## [31] "ETV6" "MYC" "ABCB1" "CASP3" "BCL2" "BCL2L1"
## [37] "NUP98" "CD33" "MPL" "PRAME" "DNTT" "STAT5B"
## [43] "ANXA5" "DOK1" "GATA2" "GRB2" "MECOM" "HSP90AA1"
## [49] "HRAS" "PRTN3" "MYB" "THPO" "HOXA9" "GAB2"
## [55] "AXL" "U2AF1" "PTK2B" "EPO" "CASP9" "FGFR1"
## [61] "GATA1" "CD177" "TKT" "MPO" "PCBP2" "CBL"
## [67] "BACH2" "STAT1" "BIRC5" "ABCC1" "NKG7" "RASA1"
## [73] "IKZF1" "RARA" "CDKN1A" "DNMT1" "CDKN2B" "MEIS1"
## [79] "KMT2A" "KIR3DL1" "FOXO3" "CEACAM8" "EIF4EBP1" "NTRK3"
## [85] "PIK3CG" "SIPA1" "HSPA4" "SOCS1" "CRK" "BMI1"
## [91] "CCND2" "H2AC18" "TCN1" "MAPK8" "GAPDH" "CDK2"
## [97] "JUNB" "MAPK9" "MAPK1" "FCGR3B" "PTPN11" "NF1"
## [103] "PXN" "HDAC9" "FN1" "CSF2RA" "HOXA10" "CCL3"
## [109] "KLRK1" "IL11" "FES" "CEBPB" "MNDA" "TNFSF10"
## [115] "MAPK14" "PTPRC" "RAF1" "EVPL" "TAL1" "ELANE"
## [121] "APAF1" "EPOR" "NTRK1" "CTLA4" "CBFB" "GZMB"
## [127] "HSPA8" "NME1" "G6PD" "ASS1" "TNFRSF10B" "H4C16"
## [133] "SOCS2" "EIF4E" "PLK1" "ITGB3" "PTK2" "ADA"
## [139] "SHC1" "AKT1" "CCNA2" "TP53" "INPP5D" "EGFR"
## [145] "NFKBIA" "CREB1" "PML" "TNFRSF10A" "SOCS3" "CDC25A"
## [151] "SELP" "IRF4" "LEF1" "CREBBP" "KDM4C" "ERBB2"
## [157] "BIRC3" "CD38" "PTPN6" "CCR7" "PRKCD" "ENG"
## [163] "ALK" "STAT3" "PTPN1" "SIRPA" "NPM1" "CTNNB1"
## [169] "ICOSLG" "ASXL1" "CCR6" "CD19" "CD8A" "LUC7L2"
## [175] "LCK" "VEGFA" "CCND1" "PTEN" "TET2" "ITGB1"
## [181] "IRF2" "PRODH" "BCL2L11" "ABCG2" "ABL2" "MLLT3"
## [187] "CDKN1C" "CD4" "NOTCH1" "IL2" "CALR" "IL6"
## [193] "ACTB" "CEBPA" "MTOR" "GOLGA4" "HLA-A" "JAK1"
## [199] "DNMT3A"
Try and edit the code and answer quiz questions 1 and 2 on Slido.
Slido.com: # 1675300
We need to subset to get the Ensembl protein IDs by their gene names. Note that some alias may not have a corresponding protein name. This might be due to the uses of different alias in the interactome or it is not recorded at all.
To extract the CML subnetwork, we keep interactions that involve our genes of interests.
CML_network = interactome_df[(interactome_df$protein1 %in% keygenes_Ensembl_list) &(interactome_df$protein2 %in% keygenes_Ensembl_list), ]
rmarkdown::paged_table(CML_network) ## dont run thisWe have the list of interactions, which is huge (4,232 interaction). They are not all relevant though. STRING provides a separate column “combined_score” which denotes how confident they are with the interaction. This reflects whether the interaction is simply predicted due to co-mentioning in literature or tested experimentally for their interaction.
A middle, and widely accepted cut-off is 400. We want to aim for a higher confidence at 800. This can be done by:
CML_network = CML_network[CML_network$combined_score >= 800,]
rmarkdown::paged_table(CML_network) ## dont run thisTry and answer quiz question 3 on Slido!
Now we have a protein network but the Ensembl Protein IDs are confusing. We can add the alias name from the keygenes_Ensembl that we have generated.
CML_network = merge(CML_network,
keygenes_Ensembl[c("protein_name","alias")],
by.x = "protein1",
by.y = "protein_name",
all.x = TRUE)
colnames(CML_network) = c("protein1", "protein2", "combined_score","protein1_alias")
CML_network = merge(CML_network,
keygenes_Ensembl[c("protein_name","alias")],
by.x = "protein2",
by.y = "protein_name",
all.x = TRUE)
colnames(CML_network) = c("protein1", "protein2", "combined_score","protein1_alias", "protein2_alias")
rmarkdown::paged_table(CML_network) ## dont run thisTo ensure we can retrieve the same result in each run, saving the network would be helpful. This also helps to feed the result to other pipelines.
write.csv(CML_network, "CML_network.csv", row.names = FALSE)
CML_network <- read.csv("CML_network.csv")
rmarkdown::paged_table(CML_network) ## dont run thisUp until now, we are subsetting the network as a datatable (a.k.a edgelist, as the table stores the edges of the data). To run any algorithms, we will need to read it as a network. The package we are using is igraph, which you should have loaded at the start of the tutorial
To visualise the graph, we use the plot function
par(mar = rep(0, 4)) # reduce margins
## adjusting layout
plot(g,
vertex.color = 'white',
vertex.size = 2,
edge.color = rgb(0.5, 0.5, 0.5, 0.2),
vertex.label.cex = 0.7
)
The nodes are all clustered together. The best way to overcome this is
to adjust the layout.
par(mar = rep(2, 4)) # reduce margins
## adjusting layout
lay <- layout_(g,
with_dh(weight.edge.lengths = edge_density(g)/1000))
plot(g,
vertex.color = 'white',
vertex.size = 2,
edge.color = rgb(0.5, 0.5, 0.5, 0.2),
vertex.label.cex = 0.7,
layout=lay
)Try and answer quiz question 4 on Slido!
In the previous lectures, we learnt about several centrality scores. The algorithms measure the topology of the network and give us some insights on the node or edge importances. In this exercise, we use the CML network and calculate some centralities we have described.
Degree centrality measures the number of edges connected to the nodes. We use the “degree()” function from igraph to calculate the degree centrality of the CML network.
degree_centrality <- degree(g,
v = V(g),
mode = "all")
degree_centrality[order(degree_centrality,
decreasing = TRUE)]## GRB2 SRC JAK2 EGFR SHC1 PTPN11 STAT3 JAK1
## 58 52 46 46 42 42 38 34
## TP53 HSP90AA1 LYN CBL CRK RUNX1 CRKL PTK2
## 32 32 30 30 24 24 24 24
## CREBBP ABL1 STAT5A EPOR LCK STAT5B HRAS STAT1
## 22 22 20 20 18 18 18 18
## ERBB2 PDGFRA CTNNB1 KRAS CDK2 CDKN1A MAPK1 FOXO3
## 18 16 16 14 14 14 14 14
## PTK2B FN1 HSPA4 RAF1 HSPA8 BCL2L1 PDGFRB CCND1
## 14 14 14 12 12 12 12 12
## APAF1 ITGB3 PTPN6 GAB2 SOCS3 IL3 IL6 RASA1
## 12 12 12 10 10 10 10 10
## CSF3 NRAS HCK BCL2 ITGB1 SOCS2 EPO CSF3R
## 10 10 10 10 10 8 8 8
## CCND2 CDKN1C BRAF KIT CEBPA CCNA2 GATA1 IL2
## 8 8 8 8 8 8 8 8
## SOCS1 ACTB NTRK1 CD8A PRTN3 THPO CSF2 CREB1
## 8 8 8 6 6 6 6 6
## LEF1 CASP3 DNMT1 CEBPB MYC NPM1 PTPN1 KITLG
## 6 6 6 6 6 6 6 6
## GATA2 ALK TAL1 MAPK8 MAPK9 AKT1 CALR HLA-A
## 6 6 6 6 6 6 6 6
## BCL2L11 PIK3CG NTRK3 CD19 SIRPA INPP5D CTLA4 TNFSF10
## 6 6 6 6 6 6 6 4
## TNFRSF10B ELANE MPO TNFRSF10A CCL3 PML NF1 FCGR3B
## 4 4 4 4 4 4 4 4
## KMT2A CASP9 MTOR DNMT3A GZMB AXL MCL1 MPL
## 4 4 4 4 4 4 4 4
## FLT3 CD4 MAPK14 CD177 TCN1 IRF4 RARA PLK1
## 4 4 2 2 2 2 2 2
## HOXA9 DOK1 CBFB MECOM BIRC3 PTEN MEIS1 CD34
## 2 2 2 2 2 2 2 2
## EIF4EBP1 BCR ABL2 BMI1 ASXL1 IKZF1 KIR3DL1 CD33
## 2 2 2 2 2 2 2 2
## GAPDH PTPRC CCR6 CCR7 ICOSLG IFNA1
## 2 2 2 2 2 2
We can visualise the degree centrality on the network plot itself.
par(mar = rep(0, 4)) # reduce margins
## define colour
X = length(V(g))
colors <- heat.colors(X, rev=T)
plot(g,
#layout = lay,
vertex.color = colors[degree(g)],
vertex.size = 10,
vertex.label.cex = 0.5,
vertex.label.color = "black",
bg = ""
)Closeness centrality describe how far a node is from other nodes. We use the “closeness()” function from igraph to calculate the closeness centrality of the CML network.
closeness_centrality <- closeness(g)
closeness_centrality[order(closeness_centrality,
decreasing = TRUE)]## HOXA9 MEIS1 BMI1 ASXL1 TNFSF10 TNFRSF10B
## 1.000000000 1.000000000 1.000000000 1.000000000 0.500000000 0.500000000
## TNFRSF10A CCL3 PRTN3 CCR6 CCR7 ELANE
## 0.500000000 0.500000000 0.333333333 0.333333333 0.333333333 0.250000000
## MPO CD177 SRC STAT3 EGFR GRB2
## 0.250000000 0.200000000 0.003787879 0.003521127 0.003521127 0.003436426
## PTPN11 HSP90AA1 SHC1 JAK2 LYN ERBB2
## 0.003367003 0.003322259 0.003289474 0.003267974 0.003174603 0.003144654
## JAK1 TP53 CREBBP CBL PTK2 STAT5A
## 0.003125000 0.003105590 0.003067485 0.003067485 0.003058104 0.003030303
## MAPK1 ABL1 LCK CRK STAT1 RUNX1
## 0.003012048 0.003012048 0.002994012 0.002967359 0.002958580 0.002958580
## STAT5B PDGFRA CTNNB1 HCK CRKL PDGFRB
## 0.002949853 0.002941176 0.002932551 0.002932551 0.002890173 0.002873563
## PTK2B GAB2 HRAS FOXO3 AKT1 NPM1
## 0.002873563 0.002808989 0.002785515 0.002777778 0.002777778 0.002770083
## ITGB3 FN1 AXL RAF1 EPOR PTPN6
## 0.002754821 0.002739726 0.002732240 0.002724796 0.002724796 0.002666667
## HSPA4 PTPN1 CEBPB NTRK1 IL6 DNMT1
## 0.002659574 0.002645503 0.002638522 0.002617801 0.002590674 0.002590674
## NTRK3 RASA1 ITGB1 ALK SIRPA KIT
## 0.002590674 0.002570694 0.002557545 0.002551020 0.002538071 0.002531646
## HSPA8 CDKN1A INPP5D CD19 MYC BRAF
## 0.002518892 0.002518892 0.002512563 0.002493766 0.002463054 0.002457002
## APAF1 CSF3R ACTB IRF4 BCL2 FLT3
## 0.002457002 0.002444988 0.002444988 0.002439024 0.002439024 0.002421308
## CREB1 PIK3CG LEF1 KRAS CALR CTLA4
## 0.002403846 0.002403846 0.002392344 0.002386635 0.002386635 0.002380952
## IL3 BCL2L1 CCND1 SOCS3 CSF3 SOCS1
## 0.002364066 0.002364066 0.002364066 0.002358491 0.002352941 0.002352941
## MTOR EPO CEBPA GZMB MPL IL2
## 0.002352941 0.002347418 0.002331002 0.002331002 0.002325581 0.002288330
## CCNA2 IFNA1 PLK1 KMT2A CD8A NRAS
## 0.002277904 0.002242152 0.002232143 0.002217295 0.002212389 0.002212389
## TCN1 PTEN MAPK8 MAPK9 CD4 MAPK14
## 0.002212389 0.002207506 0.002192982 0.002192982 0.002192982 0.002183406
## BCR ABL2 CDK2 GATA1 PTPRC GATA2
## 0.002183406 0.002183406 0.002178649 0.002173913 0.002173913 0.002169197
## TAL1 PML CBFB MECOM KITLG NF1
## 0.002169197 0.002164502 0.002155172 0.002155172 0.002100840 0.002083333
## SOCS2 DNMT3A CD34 CCND2 DOK1 HLA-A
## 0.002049180 0.002044990 0.002036660 0.001964637 0.001941748 0.001934236
## CDKN1C GAPDH FCGR3B CASP3 CASP9 BCL2L11
## 0.001915709 0.001912046 0.001904762 0.001886792 0.001883239 0.001879699
## CSF2 THPO ICOSLG MCL1 EIF4EBP1 IKZF1
## 0.001851852 0.001831502 0.001831502 0.001824818 0.001814882 0.001706485
## RARA KIR3DL1 CD33 BIRC3
## 0.001700680 0.001555210 0.001536098 0.001524390
We see HOXA9, and MEIS1 are jointly at the top. They are from the disconnected graph.
Try and answer quiz question 5 on Slido!
Betweenness centrality describe how much influence a node has in the network. We use the “betweenness()” function from igraph to calculate the Betweenness centrality of the CML network.
betweenness_centrality <- betweenness(g)
betweenness_centrality[order(betweenness_centrality,
decreasing = TRUE)]## SRC HSP90AA1 GRB2 RUNX1 STAT3 TP53
## 1657.0074563 1453.6903142 1050.4800924 1034.5827514 1030.8493079 959.3456120
## JAK2 EGFR PTPN11 CREBBP LCK SHC1
## 938.9748795 747.2673912 672.8506374 480.8040049 465.0660305 462.0491129
## ABL1 JAK1 APAF1 LYN HRAS MAPK1
## 428.3986606 414.7631181 407.2044758 406.9505766 290.1139735 258.4301585
## PTK2 CD19 CBL FOXO3 BCL2L1 RASA1
## 253.2915905 250.5836441 237.0842378 224.0694477 205.6064147 163.8690773
## HSPA8 CRK FN1 CALR CDKN1A HLA-A
## 163.6236726 161.1206687 154.6942667 152.1455128 147.6236060 134.9464286
## CRKL EPOR ERBB2 BCL2 PML FCGR3B
## 133.6058462 133.4761884 130.2126259 129.6477371 126.0000000 126.0000000
## CASP3 GATA1 MTOR CTLA4 CTNNB1 CCND1
## 126.0000000 126.0000000 126.0000000 126.0000000 118.4693804 111.7086459
## CD8A STAT5A HSPA4 CSF3 NPM1 IL3
## 103.9259158 99.1687605 92.2768404 86.2796157 76.5131758 75.8938316
## DNMT1 KRAS RAF1 NTRK1 CCNA2 STAT1
## 75.4817433 65.4964644 61.5551469 58.8162393 53.6811661 52.3333594
## MYC ITGB3 EPO CEBPB CDK2 KIT
## 49.5182567 49.4247666 48.3155878 46.8658560 43.3722410 41.6476199
## BRAF PTPN6 PDGFRB MPL IL6 IL2
## 40.9770998 38.9381370 36.2654259 35.7033616 32.3549302 31.7377351
## PIK3CG ACTB CEBPA GAB2 STAT5B ITGB1
## 29.6522564 28.5243416 23.8592720 23.8188312 21.6380912 18.0568941
## FLT3 PDGFRA CREB1 SOCS3 SOCS1 NRAS
## 17.9818352 16.1279408 13.2530525 12.4693962 12.2193962 8.9278250
## CSF3R HCK KITLG PTK2B AKT1 LEF1
## 8.8476558 8.2633700 7.5462185 6.5235160 5.5595410 4.8712121
## SOCS2 CSF2 CCND2 BCL2L11 PRTN3 CDKN1C
## 3.7293040 3.7000000 2.7435953 2.2750073 2.0000000 1.6167249
## THPO ALK CCL3 INPP5D DNMT3A MAPK14
## 1.3333333 1.2269231 1.0000000 0.5836441 0.5000000 0.0000000
## TNFSF10 TNFRSF10B ELANE CD177 MPO TNFRSF10A
## 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## NF1 KMT2A TCN1 IRF4 RARA PLK1
## 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## HOXA9 DOK1 PTPN1 GATA2 TAL1 CBFB
## 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## MECOM MAPK8 MAPK9 BIRC3 CASP9 PTEN
## 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## MEIS1 CD34 EIF4EBP1 GZMB BCR NTRK3
## 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## ABL2 AXL BMI1 MCL1 ASXL1 IKZF1
## 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## SIRPA KIR3DL1 CD4 CD33 GAPDH PTPRC
## 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## CCR6 CCR7 ICOSLG IFNA1
## 0.0000000 0.0000000 0.0000000 0.0000000
Try and answer quiz question 6 on Slido!
Eigenvector centrality describe how much influence a node has in the network. We use the “evcent()” function from igraph to calculate the Eigenvector centrality of the CML network.
You might notice that the algorithms returns a list of list. To over come this, we need to read into the actual result, which is stored as “vector”.
eigenvector_centrality <- evcent(g)
eigenvector_centrality <- eigenvector_centrality$vector
eigenvector_centrality[order(eigenvector_centrality,
decreasing = TRUE)]## GRB2 EGFR SRC PTPN11 SHC1 JAK2
## 1.0000000000 0.9702884779 0.9504358849 0.8954393915 0.8336152932 0.8332010860
## STAT3 JAK1 CBL CRK LYN PTK2
## 0.6944604013 0.6441682042 0.6359699302 0.5537366975 0.5475638723 0.5415599374
## CRKL STAT5A ERBB2 STAT5B STAT1 PDGFRA
## 0.5165170653 0.4998364366 0.4904465755 0.4823431644 0.4633564687 0.4523737699
## EPOR PTK2B PDGFRB ABL1 HSP90AA1 LCK
## 0.4214892083 0.3854504027 0.3565130104 0.3423456421 0.3416127459 0.3403185440
## CTNNB1 GAB2 PTPN6 HCK FN1 CREBBP
## 0.3091495041 0.3063665997 0.2963098546 0.2733833719 0.2483516026 0.2328757960
## MAPK1 ITGB3 PTPN1 IL6 RUNX1 INPP5D
## 0.2303542673 0.2278370631 0.2192058018 0.2130446614 0.1984769692 0.1933983061
## KIT NTRK1 SIRPA NTRK3 TP53 CSF3R
## 0.1803556394 0.1797405745 0.1780128928 0.1767305733 0.1749920129 0.1721225467
## SOCS3 ITGB1 AXL ALK SOCS1 CD19
## 0.1709341938 0.1667084587 0.1584135296 0.1577167812 0.1392725718 0.1265326752
## FOXO3 RASA1 RAF1 HRAS AKT1 EPO
## 0.1238377906 0.1209598280 0.1162747439 0.1155240374 0.1132254020 0.1113788809
## NPM1 HSPA4 CTLA4 IL3 MAPK9 MAPK8
## 0.1082420118 0.1020171901 0.0974360888 0.0961985473 0.0946096881 0.0946096881
## CSF3 GZMB FLT3 IL2 CEBPB DNMT1
## 0.0942923383 0.0925888191 0.0866176530 0.0792147930 0.0746169713 0.0713690781
## MPL SOCS2 ACTB LEF1 HSPA8 PIK3CG
## 0.0694873345 0.0664630665 0.0645285105 0.0601432629 0.0583365134 0.0568725721
## IRF4 IFNA1 TCN1 BRAF KRAS CDKN1A
## 0.0564037630 0.0523190532 0.0516531931 0.0504467795 0.0477046696 0.0464228829
## PTEN MYC APAF1 NRAS KITLG CALR
## 0.0439852557 0.0427109494 0.0409772462 0.0371446131 0.0370600563 0.0364763876
## CREB1 CD8A CCND1 CD4 BCL2 MTOR
## 0.0348932101 0.0305318401 0.0303684909 0.0301203022 0.0292708694 0.0279298770
## BCR ABL2 PTPRC CEBPA BCL2L1 THPO
## 0.0278051598 0.0278051598 0.0276405198 0.0257696290 0.0243526956 0.0223482546
## CSF2 KMT2A CDK2 CCNA2 CD34 GATA1
## 0.0219053745 0.0217480788 0.0212516962 0.0202393271 0.0201710060 0.0193762933
## GATA2 TAL1 MAPK14 PML MECOM CBFB
## 0.0192580769 0.0192580769 0.0187092705 0.0162272556 0.0161202106 0.0161202106
## PLK1 NF1 FCGR3B DOK1 DNMT3A CCND2
## 0.0142127730 0.0132573625 0.0103451704 0.0098243031 0.0092655288 0.0084930694
## ICOSLG CDKN1C HLA-A GAPDH BCL2L11 CASP3
## 0.0079137155 0.0065262009 0.0054785179 0.0047380655 0.0045459150 0.0036465748
## CASP9 MCL1 EIF4EBP1 IKZF1 RARA CD33
## 0.0036243268 0.0023471322 0.0022684521 0.0015737339 0.0013179704 0.0008402301
## KIR3DL1 BIRC3 TNFSF10 TNFRSF10B PRTN3 ELANE
## 0.0004449628 0.0002961732 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## CD177 MPO TNFRSF10A CCL3 HOXA9 MEIS1
## 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## BMI1 ASXL1 CCR6 CCR7
## 0.0000000000 0.0000000000 0.0000000000 0.0000000000
Try and answer quiz question 7 on Slido!
We extracted the interactome from a non-disease database, therefore the edges may only be applicable to normal settings. We are interested to disease-associated interactions. One of the ways to do so is to seed RWR from dysregulated protein, in our case BCR and ABL1. (as there is no node called BCR/ABL1)
## Create a vector of importance
ABL1_PPR_vector = ifelse(V(g)$name == "ABL1", 1, 0)
ABL1_PPR_vector## [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [38] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [75] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
## [112] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
ABL1_PPR <- page_rank(g,
personalized = ABL1_PPR_vector)
ABL1_PPR = ABL1_PPR$vector
ABL1_PPR[order(ABL1_PPR,
decreasing = TRUE)]## ABL1 GRB2 SHC1 CRK CRKL SRC
## 0.2003673067 0.0528574566 0.0458749224 0.0307561526 0.0302908107 0.0267461299
## HSP90AA1 EGFR FN1 CTNNB1 CBL NTRK1
## 0.0267120163 0.0260221126 0.0232360835 0.0220142712 0.0215505564 0.0206331527
## PTPN11 NTRK3 JAK2 STAT3 PTK2 BCR
## 0.0197411646 0.0188890406 0.0169055096 0.0159265991 0.0156832102 0.0154829282
## ABL2 LYN JAK1 ERBB2 TP53 PTK2B
## 0.0154829282 0.0131186963 0.0119736010 0.0117193851 0.0099586914 0.0098233079
## PDGFRA ITGB3 CREBBP STAT5A EPOR STAT5B
## 0.0088603083 0.0088329827 0.0086244468 0.0086080212 0.0083537674 0.0079248138
## LCK ITGB1 STAT1 RUNX1 GAB2 KITLG
## 0.0072409179 0.0071066444 0.0067846673 0.0063780105 0.0063257452 0.0061556895
## MAPK8 MAPK9 FOXO3 HSPA4 PDGFRB MAPK1
## 0.0060337115 0.0060337115 0.0059397254 0.0057552829 0.0056277542 0.0053573364
## KIT PTPN6 HSPA8 ACTB ALK HRAS
## 0.0052579733 0.0050004369 0.0045511694 0.0045232936 0.0043436717 0.0042315113
## INPP5D CDKN1A IL3 HCK LEF1 APAF1
## 0.0041495052 0.0039655497 0.0038460310 0.0036389245 0.0034572266 0.0034080076
## IL6 NPM1 RAF1 KRAS SIRPA AKT1
## 0.0033173542 0.0033090327 0.0032292599 0.0030808910 0.0030567126 0.0029923244
## CDK2 SOCS3 RASA1 GZMB CD19 CD34
## 0.0029730912 0.0029684313 0.0029526445 0.0029322188 0.0028252833 0.0028215244
## CCND1 BRAF SOCS2 CALR MYC AXL
## 0.0027878523 0.0026503053 0.0025914169 0.0025586309 0.0024540705 0.0024236630
## PTPN1 CSF3R CSF3 SOCS1 IL2 MTOR
## 0.0023855027 0.0023692351 0.0023670203 0.0022787583 0.0022512622 0.0022216452
## BCL2L1 NRAS FLT3 EPO CTLA4 BCL2
## 0.0022151246 0.0021611337 0.0020999464 0.0020958472 0.0019856540 0.0019776525
## CEBPA DNMT1 CREB1 CCNA2 CD8A HLA-A
## 0.0018104911 0.0017475004 0.0016811650 0.0016777093 0.0016089506 0.0015554091
## CEBPB CCND2 CSF2 PIK3CG CDKN1C THPO
## 0.0015490109 0.0015435996 0.0015346119 0.0015104282 0.0014404921 0.0012656990
## FCGR3B GATA1 TCN1 DNMT3A KMT2A CD4
## 0.0012532242 0.0012420229 0.0012211982 0.0011904451 0.0011427646 0.0011397338
## PTEN CASP3 GATA2 TAL1 MPL EIF4EBP1
## 0.0011108941 0.0010770905 0.0009986590 0.0009986590 0.0009833835 0.0009441992
## BCL2L11 CASP9 NF1 IRF4 PML PTPRC
## 0.0008906253 0.0007879767 0.0007737509 0.0007125058 0.0007072810 0.0006838645
## MAPK14 GAPDH IFNA1 MCL1 ICOSLG CD33
## 0.0006505337 0.0006447490 0.0005986800 0.0005661532 0.0005626020 0.0005326203
## PLK1 DOK1 CBFB MECOM KIR3DL1 BIRC3
## 0.0005290555 0.0005019496 0.0004517757 0.0004517757 0.0004406992 0.0003051756
## RARA IKZF1 TNFSF10 TNFRSF10B PRTN3 ELANE
## 0.0003005944 0.0002639299 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## CD177 MPO TNFRSF10A CCL3 HOXA9 MEIS1
## 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## BMI1 ASXL1 CCR6 CCR7
## 0.0000000000 0.0000000000 0.0000000000 0.0000000000
What would the codes be for starting at “BCR” instead of “ABL1”?
Try and answer quiz question 8 on Slido!
## Create a vector of importance
CML_PPR_vector = ifelse(V(g)$name %in% c("BCR","ABL1"), 1, 0)
CML_PPR <- page_rank(g,
personalized = CML_PPR_vector)
CML_PPR = CML_PPR$vector
CML_PPR[order(CML_PPR,
decreasing = TRUE)]## ABL1 BCR GRB2 SHC1 CRK CRKL
## 0.1853397587 0.0893217086 0.0488931473 0.0424343033 0.0284494412 0.0280189999
## SRC HSP90AA1 EGFR FN1 CTNNB1 CBL
## 0.0247401701 0.0247086150 0.0240704541 0.0214933772 0.0203632008 0.0199342646
## NTRK1 PTPN11 NTRK3 JAK2 STAT3 PTK2
## 0.0190856662 0.0182605773 0.0174723626 0.0156375964 0.0147321042 0.0145069695
## ABL2 LYN JAK1 ERBB2 TP53 PTK2B
## 0.0143217086 0.0121347941 0.0110755809 0.0108404313 0.0092117896 0.0090865598
## PDGFRA ITGB3 CREBBP STAT5A EPOR STAT5B
## 0.0081957851 0.0081705090 0.0079776133 0.0079624196 0.0077272349 0.0073304528
## LCK ITGB1 STAT1 RUNX1 GAB2 KITLG
## 0.0066978490 0.0065736460 0.0062758172 0.0058996597 0.0058513143 0.0056940128
## MAPK8 MAPK9 FOXO3 HSPA4 PDGFRB MAPK1
## 0.0055811831 0.0055811831 0.0054942460 0.0053236367 0.0052056727 0.0049555361
## KIT PTPN6 HSPA8 ACTB ALK HRAS
## 0.0048636253 0.0046254041 0.0042098317 0.0041840466 0.0040178963 0.0039141479
## INPP5D CDKN1A IL3 HCK LEF1 APAF1
## 0.0038382923 0.0036681335 0.0035575787 0.0033660052 0.0031979346 0.0031524070
## IL6 NPM1 RAF1 KRAS SIRPA AKT1
## 0.0030685526 0.0030608553 0.0029870654 0.0028498242 0.0028274592 0.0027679000
## CDK2 SOCS3 RASA1 GZMB CD19 CD34
## 0.0027501094 0.0027457990 0.0027311962 0.0027123024 0.0026133871 0.0026099101
## CCND1 BRAF SOCS2 CALR MYC AXL
## 0.0025787633 0.0024515324 0.0023970606 0.0023667336 0.0022700152 0.0022418883
## PTPN1 CSF3R CSF3 SOCS1 IL2 MTOR
## 0.0022065900 0.0021915425 0.0021894937 0.0021078514 0.0020824176 0.0020550218
## BCL2L1 NRAS FLT3 EPO CTLA4 BCL2
## 0.0020489903 0.0019990486 0.0019424504 0.0019386587 0.0018367300 0.0018293285
## CEBPA DNMT1 CREB1 CCNA2 CD8A HLA-A
## 0.0016747043 0.0016164379 0.0015550776 0.0015518811 0.0014882793 0.0014387534
## CEBPB CCND2 CSF2 PIK3CG CDKN1C THPO
## 0.0014328350 0.0014278296 0.0014195160 0.0013971461 0.0013324552 0.0011707716
## FCGR3B GATA1 TCN1 DNMT3A KMT2A CD4
## 0.0011592324 0.0011488712 0.0011296083 0.0011011617 0.0010570573 0.0010542538
## PTEN CASP3 GATA2 TAL1 MPL EIF4EBP1
## 0.0010275770 0.0009963087 0.0009237596 0.0009237596 0.0009096298 0.0008733843
## BCL2L11 CASP9 NF1 IRF4 PML PTPRC
## 0.0008238284 0.0007288785 0.0007157196 0.0006590678 0.0006542349 0.0006325746
## MAPK14 GAPDH IFNA1 MCL1 ICOSLG CD33
## 0.0006017437 0.0005963928 0.0005537790 0.0005236917 0.0005204068 0.0004926738
## PLK1 DOK1 CBFB MECOM KIR3DL1 BIRC3
## 0.0004893763 0.0004643034 0.0004178926 0.0004178926 0.0004076468 0.0002822875
## RARA IKZF1 TNFSF10 TNFRSF10B PRTN3 ELANE
## 0.0002780498 0.0002441351 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## CD177 MPO TNFRSF10A CCL3 HOXA9 MEIS1
## 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## BMI1 ASXL1 CCR6 CCR7
## 0.0000000000 0.0000000000 0.0000000000 0.0000000000
Try and answer quiz question 9 on Slido!
ppr <- as.data.frame(CML_PPR)
centrality_result <- cbind(ppr,
degree_centrality,
eigenvector_centrality,
closeness_centrality,
betweenness_centrality)
rmarkdown::paged_table(centrality_result) ## dont run thiscentrality_average = rowMeans(centrality_result)
centrality_average[order(centrality_average,
decreasing = TRUE)]## SRC HSP90AA1 GRB2 STAT3 RUNX1 TP53
## 341.9972840 297.2119916 221.9064844 213.9124043 211.7580173 198.3065843
## JAK2 EGFR PTPN11 SHC1 CREBBP LCK
## 197.1653972 158.8530543 143.1535409 100.9856904 100.6095852 96.6832082
## ABL1 JAK1 LYN APAF1 HRAS PTK2
## 90.1858716 89.8842974 87.5026900 83.8502125 61.6472394 55.5701431
## MAPK1 CBL CD19 FOXO3 BCL2L1 CRK
## 54.5336961 53.5486419 51.3430568 47.6403115 43.5270361 37.1411644
## HSPA8 RASA1 FN1 CDKN1A CALR CRKL
## 35.1377476 34.7990678 33.7933703 32.3352432 31.6373485 31.6306545
## EPOR ERBB2 HLA-A BCL2 CTNNB1 GATA1
## 30.7816259 29.7434115 28.1910560 27.9362553 26.9603651 26.8045398
## CTLA4 CASP3 MTOR PML FCGR3B CCND1
## 26.4203308 26.4013059 26.0064676 26.0038092 26.0026818 24.7487914
## STAT5A CD8A HSPA4 CSF3 IL3 NPM1
## 23.9359179 21.9920297 21.2773682 19.2756901 17.1991904 16.5254498
## DNMT1 KRAS RAF1 STAT1 NTRK1 CCNA2
## 16.3114639 15.9098811 14.7354267 14.1611901 13.4035367 12.3410471
## ITGB3 CDK2 EPO MYC CEBPB PTPN6
## 12.3327058 11.4796843 11.2862506 11.1131402 10.5889089 10.2483478
## KIT BRAF PDGFRB IL6 STAT5B IL2
## 9.9670742 9.8064910 9.7260036 8.5147268 8.0261429 7.9642641
## MPL ACTB PIK3CG GAB2 PDGFRA CEBPA
## 7.9552168 7.3190998 7.1425860 6.8267716 6.5182903 6.3778095
## ITGB1 SOCS3 FLT3 PTK2B SOCS1 CREB1
## 5.6465468 4.5290869 4.4145633 4.1841853 4.0726259 3.8583809
## NRAS HCK CSF3R KITLG SOCS2 AKT1
## 3.7938362 3.7086104 3.4048830 2.7182147 2.3600427 2.3356624
## LEF1 CCND2 CSF2 CDKN1C PRTN3 BCL2L11
## 2.1873891 2.1510962 1.9450353 1.9252999 1.6666667 1.6564514
## ALK THPO INPP5D PTPN1 NTRK3 SIRPA
## 1.4782418 1.4717368 1.3566787 1.2448116 1.2393587 1.2366757
## MAPK8 MAPK9 GATA2 TAL1 CCL3 DNMT3A
## 1.2204768 1.2204768 1.2044702 1.2044702 1.1000000 0.9024823
## TNFSF10 TNFRSF10B TNFRSF10A ELANE MPO AXL
## 0.9000000 0.9000000 0.9000000 0.8500000 0.8500000 0.8326775
## GZMB CD4 KMT2A NF1 CASP9 MCL1
## 0.8195264 0.8066735 0.8050045 0.8032113 0.8012473 0.8009391
## HOXA9 MEIS1 BMI1 ASXL1 CCR6 CCR7
## 0.6000000 0.6000000 0.6000000 0.6000000 0.4666667 0.4666667
## CD177 BCR IRF4 IFNA1 TCN1 PTEN
## 0.4400000 0.4238621 0.4119004 0.4110230 0.4109990 0.4094441
## ABL2 PTPRC CD34 MAPK14 CBFB MECOM
## 0.4088621 0.4060894 0.4049635 0.4042989 0.4037387 0.4037387
## PLK1 DOK1 ICOSLG GAPDH EIF4EBP1 IKZF1
## 0.4033869 0.4024461 0.4020531 0.4014493 0.4009913 0.4007049
## RARA CD33 KIR3DL1 BIRC3
## 0.4006593 0.4005738 0.4004816 0.4004206
Try and answer quiz question 10 on Slido!
https://www.genecards.org/cgi-bin/carddisp.pl?gene=HSP90AA1&keywords=HSP90AA1
Try and answer quiz question 11 on Slido!
https://go.drugbank.com/bio_entities/BE0001120
Try and answer quiz question 12 on Slido!
## R version 4.2.2 (2022-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19045)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United Kingdom.utf8
## [2] LC_CTYPE=English_United Kingdom.utf8
## [3] LC_MONETARY=English_United Kingdom.utf8
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United Kingdom.utf8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] data.table_1.14.8 dplyr_1.1.3 igraph_1.5.1
##
## loaded via a namespace (and not attached):
## [1] rstudioapi_0.15.0 knitr_1.45 magrittr_2.0.3 tidyselect_1.2.0
## [5] R6_2.5.1 rlang_1.1.1.9000 fastmap_1.1.1 fansi_1.0.5
## [9] highr_0.10 tools_4.2.2 xfun_0.40 R.oo_1.25.0
## [13] utf8_1.2.3 cli_3.6.1 jquerylib_0.1.4 htmltools_0.5.6.1
## [17] yaml_2.3.7 digest_0.6.31 tibble_3.2.1 lifecycle_1.0.3
## [21] bookdown_0.36 R.utils_2.12.2 sass_0.4.7 vctrs_0.6.4
## [25] curl_5.1.0 glue_1.6.2 cachem_1.0.8 evaluate_0.22
## [29] rmarkdown_2.25 compiler_4.2.2 bslib_0.5.1 pillar_1.9.0
## [33] R.methodsS3_1.8.2 generics_0.1.3 rmdformats_1.0.4 jsonlite_1.8.7
## [37] pkgconfig_2.0.3